clear

cap program drop superclean_basic
include "codes/auxiliary_programs/superclean_basic.ado"

*--------------------------------------------------*
* Loading the 2016 ELECTORAL data for BOLIVIA only *
*--------------------------------------------------*
* Loading Bolivia
import delimited "data/inputs/1_Bolivia_votos_totales_ref_2016.csv", ///
	clear delim("|")

* Make recinto code from mesa code (?)
drop codigorecinto	
tostring codigomesa, replace
gen codigorecinto = substr(codigomesa, 1, 3)
destring codigorecinto, replace

* Keep	
keep codigodepartamento codigoprovincia ///
	codigomunicipio nombrerecinto codigorecinto no-validos

cap rename sã si

* Standardizing the precinct name
superclean_basic nombrerecinto

compress

* Collapsing at the precinct level
collapse (sum) no-validos, ///
	     by(codigodepartamento codigoprovincia codigomunicipio nombrerecinto codigorecinto)

* Identifying potential duplicates (this is the merge with 2016 padrón)
duplicates tag codigodepartamento codigoprovincia codigomunicipio inscritos, gen(tag)

* Saving the unique observations
preserve
	keep if tag==0
	drop tag
	compress
	sort codigodepartamento codigoprovincia codigomunicipio nombrerecinto
	tempfile bolivia_2016_recinto
	save `bolivia_2016_recinto'
restore

* Saving the duplicates
keep if tag!=0
drop tag
compress
sort codigodepartamento codigoprovincia codigomunicipio nombrerecinto
tempfile bolivia_2016_recinto_dups
save `bolivia_2016_recinto_dups'

*---------------------*
* Loading Padron 2016 *
*---------------------*
* Bolivia
import excel using "data/inputs/1_Bolivia_padron_ref_const_2016.xlsx", ///
	clear first sheet("Ciu_hab_por mesa NACIONAL") cellrange("A5:P29229")

* Using lowercase names for variables
foreach x of varlist _all {
	local t=lower("`x'")
	rename `x' `t'
}

gen ciud_orig=ciudadanos 
 
gen codrecinto=substr(nummesa,1,16) 
keep dep prov sec ciudadanos nombrerecinto ciud_orig nummesa codrecinto

* Standardizing the precinct name
superclean_basic nombrerecinto

compress

* Modifying the province code, and generating a municipality code
replace prov=prov+100*dep
*---*
tostring sec, gen(sec2)
replace sec2="0"+sec2
tostring prov, gen(prov2)
gen codmun=prov2+sec2
order codmun, after(prov)
destring codmun, replace
drop sec2 prov2 sec

* Collapsing at the precinct level
collapse (sum) ciudadanos ciud_orig (first) nombrerecinto, by(dep prov codmun codrecinto)

* In these ten recintos, the number of inscritos is off by one
foreach cod in "1003203096409046" "1003203089706006" "1003202060102864" ///
			   "1003202065402314" "1003207223406010" "1003207223410020" ///
		       "1003207225411024" "1003208278400006" {
	replace ciudadanos = ciudadanos - 1	if codrecinto == "`cod'"		   	
} 

* Identifying potential duplicates
duplicates tag dep prov codmun ciudadanos, gen(tag)

* Saving the unique observations
preserve
	keep if tag==0
	drop tag
	tempfile padron_bolivia
	save `padron_bolivia'
restore

* Saving the duplicates
keep if tag!=0
drop tag
compress
sort dep prov codmun nombrerecinto
tempfile padron_bolivia_dups
save `padron_bolivia_dups'

*-----------------------------------------------*
* Merging the non-problematic datasets (tag==0) *
*-----------------------------------------------*
use `padron_bolivia', clear

rename dep codigodepartamento
rename prov codigoprovincia
rename codmun codigomunicipio
rename ciudadanos inscritos
merge 1:1 codigodepartamento codigoprovincia codigomunicipio inscritos using `bolivia_2016_recinto'

* Saving the observations that merge
tempfile merged1
savesome if _m == 3 using `merged1' 

* Artificial split recintos in the electoral data
preserve
	keep if _m == 2
	collapse (sum) inscritos no-validos, ///
			  by(codigodepartamento codigoprovincia codigomunicipio nombrerecinto)
	duplicates tag codigodepartamento codigoprovincia codigomunicipio inscritos, gen(tag)
	keep if tag == 0
	drop tag
	tempfile artificial_splits_2016electoral
	save `artificial_splits_2016electoral'
restore

* Merge (collapsed) fake duplicates back to unmatched 2016 padrón observations
keep if _m == 1
keep codigodepartamento codigoprovincia codigomunicipio inscritos nombrerecinto codrecinto
merge 1:1 codigodepartamento codigoprovincia codigomunicipio inscritos using `artificial_splits_2016electoral'
tempfile merged2
rename _m merged
savesome if merged == 3 using `merged2' 

*-------------------------------------------*
* Merging the problematic datasets (tag!=0) *
*-------------------------------------------*
use `padron_bolivia_dups', clear

* Merging with the problematic observations from the 2016 electoral dataset
rename dep codigodepartamento
rename prov codigoprovincia
rename codmun codigomunicipio
rename ciudadanos inscritos
merge 1:1 codigodepartamento codigoprovincia codigomunicipio inscritos nombrerecinto using `bolivia_2016_recinto_dups'

tempfile merged3
savesome if _m ==3 using `merged3'

*------------------------------------------------------*
* Appending the successfully merged resulting datasets *
*------------------------------------------------------*
use `merged1', clear
append using `merged2'
append using `merged3'

rename nombrerecinto recinto2016
drop _m merged
tempfile e2016_with_codrecinto
save `e2016_with_codrecinto'

*-----------------------------------------*
* Merging 2019 with 2016 Padron+Electoral *
*-----------------------------------------*
import excel using "data/inputs/1_Bolivia_acta.2021.12.03.16.13.58.xlsx", ///
	clear first

keep if Elección=="Presidente y Vicepresidente" & País=="Bolivia"
keep NúmeroMesa CódigoMesa Recinto

gen codrecinto=substr(CódigoMesa,1,16)

merge m:1 codrecinto using `e2016_with_codrecinto'

preserve
	keep if _merge==3
	keep NúmeroMesa CódigoMesa recinto2016 no-validos
	renvars no-validos, postfix(2016)
	compress
	tempfile 2019_2016_ours
	save `2019_2016_ours'
restore 

*----------------------------------------------*
* Use Rosnick data to match unmerged obs.      *
*----------------------------------------------*
keep if _merge==1
drop _merge

keep NúmeroMesa CódigoMesa Recinto

gen idpais=substr(CódigoMesa,3,3)
gen idDep=substr(CódigoMesa,6,2)
gen idLoc=substr(CódigoMesa,8,4)
gen idReci=substr(CódigoMesa,12,5)
gen mesa=substr(CódigoMesa,17,2)
destring idpais-mesa, replace

* Note: this is the 2016:2019 merge shared by David Rosnick
merge m:1 idpais idDep idLoc idReci using "data/inputs/1_Bolivia_2016-2019_alternate_merge.dta"

drop if _merge==2
drop _merge

rename mSÍ mSI
foreach x in Recinto NO SI BLANCOS EMITIDOS INSCRITOS NULOS VALIDOS {
	local t=lower("`x'")
	rename m`x' `t'2016
}

keep NúmeroMesa CódigoMesa recinto2016-validos2016

* Standardizing the precinct name
superclean_basic recinto2016

gen dr=1

*-----------------------------------------*
* Append both sets of matched obs.        *
*-----------------------------------------*
append using `2019_2016_ours'
replace dr=0 if dr==.

assert _N == 33048

rename dr dr2016

*---
